; vim: ts=4:et:sw=4:
; Copyleft (K) by Jose M. Rodriguez de la Rosa
;  (a.k.a. Boriel)
;  http://www.boriel.com
; -------------------------------------------------------------------
; Simple array Index routine
; Number of total indexes dimensions - 1 at beginning of memory
; HL = Start of array memory (First two bytes contains N-1 dimensions)
; Dimension values on the stack, (top of the stack, highest dimension)
; E.g. A(2, 4) -> PUSH <4>; PUSH <2>
; For any array of N dimension A(aN-1, ..., a1, a0)
; and dimensions D[bN-1, ..., b1, b0], the offset is calculated as
; O = [a0 + b0 * (a1 + b1 * (a2 + ... bN-2(aN-1)))]
; What I will do here is to calculate the following sequence:
; ((aN-1 * bN-2) + aN-2) * bN-3 + ...
;
#include once <mul16.asm>
#ifdef __CHECK_ARRAY_BOUNDARY__
#include once <error.asm>
#endif
;
__ARRAY:
                     ;- PROC
                     ;- LOCAL LOOP
                     ;- LOCAL ARRAY_END
                     ;- LOCAL RET_ADDRESS ; Stores return address
tsx                  ;- ex (sp), hl	; Return address in HL, array address in the stack
lda $0103,x
ldy z80_h
sta z80_h
tya
sta $0103,x
lda $0104,x
ldy z80_l
sta z80_l
tya
sta $104,x
                     ;- ld (RET_ADDRESS + 1), hl ; Stores it for later
lda z80_c            ;- exx
ldx z80_cp
stx z80_c
sta z80_cp
lda z80_b
ldx z80_bp
stx z80_b
sta z80_bp
lda z80_e
ldx z80_ep
stx z80_e
sta z80_ep
lda z80_d
ldx z80_dp
stx z80_d
sta z80_dp
lda z80_l
ldx z80_lp
stx z80_l
sta z80_lp
lda z80_h
ldx z80_hp
stx z80_h
sta z80_hp
pla                  ;- pop hl		; Will use H'L' as the pointer
sta z80_h
pla
sta z80_l
ldy #$00             ;- ld c,(hl)	; Loads Number of dimensions from (hl)
lda (z80_hl),y
sta z80_c
inc z80_l            ;- inc hl
bne *+4
inc z80_h
ldy #$00             ;- ld b,(hl)
lda (z80_hl),y
sta z80_b
inc z80_l            ;- inc hl		; Ready
bne *+4
inc z80_h
lda z80_c            ;- exx
ldx z80_cp
stx z80_c
sta z80_cp
lda z80_b
ldx z80_bp
stx z80_b
sta z80_bp
lda z80_e
ldx z80_ep
stx z80_e
sta z80_ep
lda z80_d
ldx z80_dp
stx z80_d
sta z80_dp
lda z80_l
ldx z80_lp
stx z80_l
sta z80_lp
lda z80_h
ldx z80_hp
stx z80_h
sta z80_hp
lda #<0              ;- ld hl,0	; BC = Offset "accumulator"
sta z80_l
lda #>0
sta z80_h

#ifdef __CHECK_ARRAY_BOUNDARY__
pla                  ;- pop de
sta z80_d
pla
sta z80_e
#endif

LOOP:
pla                  ;- pop bc		; Get next index (Ai) from the stack
sta z80_b
pla
sta z80_c

#ifdef __CHECK_ARRAY_BOUNDARY__
lda z80_e            ;- ex de,hl
ldx z80_l
stx z80_e
sta z80_l
lda z80_d
ldx z80_h
stx z80_d
sta z80_h
ora z80_a            ;- or a
                     ;- sbc hl,bc
                     ;- ld a,ERROR_SubscriptWrong
jcc __ERROR          ;- jp c,__ERROR
lda z80_e            ;- ex de,hl
ldx z80_l
stx z80_e
sta z80_l
lda z80_d
ldx z80_h
stx z80_d
sta z80_h
#endif

lda z80_l            ;- add hl,bc	; Adds current index
clc
adc z80_c
sta z80_l
lda z80_h
adc z80_b
sta z80_h
lda z80_c            ;- exx			; Checks if B'C' = 0
ldx z80_cp
stx z80_c
sta z80_cp
lda z80_b
ldx z80_bp
stx z80_b
sta z80_bp
lda z80_e
ldx z80_ep
stx z80_e
sta z80_ep
lda z80_d
ldx z80_dp
stx z80_d
sta z80_dp
lda z80_l
ldx z80_lp
stx z80_l
sta z80_lp
lda z80_h
ldx z80_hp
stx z80_h
sta z80_hp
lda z80_b            ;- ld a,b		; Which means we must exit (last element is not multiplied by anything)
sta z80_a
ora z80_c            ;- or c
jeq ARRAY_END        ;- jr z,ARRAY_END		; if B'Ci == 0 we are done
ldy #$00             ;- ld e,(hl)			; Loads next dimension into D'E'
lda (z80_hl),y
sta z80_e
inc z80_l            ;- inc hl
bne *+4
inc z80_h
ldy #$00             ;- ld d,(hl)
lda (z80_hl),y
sta z80_d
inc z80_l            ;- inc hl
bne *+4
inc z80_h
lda z80_e            ;- push de
pha
lda z80_d
pha
                     ;- dec bc				; Decrements loop counter
lda z80_c            ;- exx
ldx z80_cp
stx z80_c
sta z80_cp
lda z80_b
ldx z80_bp
stx z80_b
sta z80_bp
lda z80_e
ldx z80_ep
stx z80_e
sta z80_ep
lda z80_d
ldx z80_dp
stx z80_d
sta z80_dp
lda z80_l
ldx z80_lp
stx z80_l
sta z80_lp
lda z80_h
ldx z80_hp
stx z80_h
sta z80_hp
pla                  ;- pop de				; DE = Max bound Number (i-th dimension)
sta z80_d
pla
sta z80_e


#ifdef __CHECK_ARRAY_BOUNDARY__
lda z80_e           ;- push de
pha
lda z80_d
pha
#endif
;call __MUL16_FAST	; HL *= DE
                     ;- call __FNMUL
#ifdef __CHECK_ARRAY_BOUNDARY__
pla                  ;- pop de
sta z80_d
pla
sta z80_e
                     ;- dec de
#endif
jmp LOOP             ;- jp LOOP

ARRAY_END:
ldy #$00              ;- ld e,(hl)
lda (z80_hl),y
sta z80_e
inc z80_l            ;- inc hl
bne *+4
inc z80_h
lda z80_c            ;- ld d,c			; C = 0 => DE = E = Element size
sta z80_d
lda z80_l            ;- push hl
pha
lda z80_h
pha
lda z80_e           ;- push de
pha
lda z80_d
pha
lda z80_c            ;- exx
ldx z80_cp
stx z80_c
sta z80_cp
lda z80_b
ldx z80_bp
stx z80_b
sta z80_bp
lda z80_e
ldx z80_ep
stx z80_e
sta z80_ep
lda z80_d
ldx z80_dp
stx z80_d
sta z80_dp
lda z80_l
ldx z80_lp
stx z80_l
sta z80_lp
lda z80_h
ldx z80_hp
stx z80_h
sta z80_hp

#ifdef __BIG_ARRAY__
pla                  ;- pop de
sta z80_d
pla
sta z80_e
jsr __FNMUL          ;- call __FNMUL
#else
                     ;- LOCAL ARRAY_SIZE_LOOP
lda z80_e            ;- ex de,hl
ldx z80_l
stx z80_e
sta z80_l
lda z80_d
ldx z80_h
stx z80_d
sta z80_h
lda #<0              ;- ld hl,0
sta z80_l
lda #>0
sta z80_h
pla                  ;- pop bc
sta z80_b
pla
sta z80_c
lda z80_c            ;- ld b,c
sta z80_b
ARRAY_SIZE_LOOP:
clc                  ;- add hl,de
lda z80_l
adc z80_e
sta z80_l
lda z80_h
adc z80_d
sta z80_h
                     ;- djnz ARRAY_SIZE_LOOP
;; Even faster
;pop bc
;ld d, h
;ld e, l
;dec c
;jp z, __ARRAY_FIN
;add hl, hl
;dec c
;jp z, __ARRAY_FIN
;add hl, hl
;dec c
;dec c
;jp z, __ARRAY_FIN
;add hl, de
;__ARRAY_FIN:
#endif
pla                  ;- pop de
sta z80_d
pla
sta z80_e
clc                  ;- add hl,de  ; Adds element start
lda z80_l
adc z80_e
sta z80_l
lda z80_h
adc z80_d
sta z80_h

RET_ADDRESS:
lda #<0              ;- ld de,0
sta z80_e
lda #>0
sta z80_d
lda z80_e            ;- push de
pha
lda z80_d
pha
rts                  ;- ret			; HL = (Start of Elements + Offset)

;; Performs a faster multiply for little 16bit numbs
                     ;- LOCAL __FNMUL, __FNMUL2

__FNMUL:
eor z80_a            ;- xor a
ora z80_d            ;- or d
jne __MUL16_FAST     ;- jp nz, __MUL16_FAST
ora z80_e            ;- or e
lda z80_e            ;- ex de,hl
ldx z80_l
stx z80_e
sta z80_l
lda z80_d
ldx z80_h
stx z80_d
sta z80_h
bne *+3              ;- ret z
rts
cmp #33              ;- cp 33
jcs __MUL16_FAST     ;- jp nc, __MUL16_FAST
lda z80_l            ;- ld b,l
sta z80_b
lda z80_h            ;- ld l,h  ; HL = 0
sta z80_l

__FNMUL2:
clc                  ;- add hl,de
lda z80_l
adc z80_e
sta z80_l
lda z80_h
adc z80_d
sta z80_h
dec z80_b            ;- djnz __FNMUL2
jne __FNMUL2
rts                  ;- ret
                     ;- ENDP

